SetUp

#Libraries

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(fastDummies)
## Warning: package 'fastDummies' was built under R version 4.1.3
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift

#Preparing the Data

df_all <- readr::read_csv("final_project_train.csv", col_names = TRUE)
## Rows: 677 Columns: 38
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (3): region, customer, outcome
## dbl (35): rowid, xb_01, xb_02, xb_03, xn_01, xn_02, xn_03, xa_01, xa_02, xa_...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_all <- dummy_cols(df_all, select_columns = 'region')
df_all <- dummy_cols(df_all, select_columns = 'customer')
df_all$log_response <- log(df_all$response)
df_all<-df_all %>% 
  mutate(outcome = ifelse(outcome == 'event', 1,0))
df_all
## # A tibble: 677 x 51
##    rowid region customer xb_01 xb_02 xb_03 xn_01 xn_02 xn_03 xa_01 xa_02 xa_03
##    <dbl> <chr>  <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1     1 XX     B         4        4     4 3         3     3 12       12    12
##  2     3 XX     B         1        1     1 2         2     2  3        3     3
##  3     4 XX     B         2        2     2 2         4     0  9        9     9
##  4     5 XX     B         2.52    11    -6 1.53      9    -3  7.08    29    -7
##  5     8 XX     B         2.55     6    -1 0.839     3    -4  6.45    17    -2
##  6     9 XX     B         3.07     6     1 1.86      8    -2  6.86    18     2
##  7    11 XX     B         3.09    10    -4 1.51      6    -5  7.66    24    -9
##  8    14 XX     B         3.71    12    -4 1.59     10    -6  8.16    27    -5
##  9    15 XX     B         2.70     9    -2 1.17     10    -3  6.89    20    -3
## 10    16 XX     B         2.82    10    -4 1.02      4    -5  7.14    19    -3
## # ... with 667 more rows, and 39 more variables: xb_04 <dbl>, xb_05 <dbl>,
## #   xb_06 <dbl>, xb_07 <dbl>, xb_08 <dbl>, xn_04 <dbl>, xn_05 <dbl>,
## #   xn_06 <dbl>, xn_07 <dbl>, xn_08 <dbl>, xa_04 <dbl>, xa_05 <dbl>,
## #   xa_06 <dbl>, xa_07 <dbl>, xa_08 <dbl>, xw_01 <dbl>, xw_02 <dbl>,
## #   xw_03 <dbl>, xs_01 <dbl>, xs_02 <dbl>, xs_03 <dbl>, xs_04 <dbl>,
## #   xs_05 <dbl>, xs_06 <dbl>, response <dbl>, outcome <dbl>, region_XX <int>,
## #   region_YY <int>, region_ZZ <int>, customer_A <int>, customer_B <int>, ...

#Model 1 Categorical Linear Additive

mod1 <- lm( log_response ~ region_XX + region_YY + region_ZZ + customer_A + customer_B + customer_D + customer_E + customer_G + customer_K + customer_M +  customer_Other + customer_Q, data = df_all)

#Model 2 Continuous: Linear Additive

mod2 <- lm( log_response ~ xa_01 + 
              xa_02 + xa_03 + xa_04 + xa_05 + xa_06 + xa_07 + xa_08 +
              
              xb_01 + xb_02 +  xb_03 + xb_04 + xb_05 + xb_06 +  xb_07 + xb_08 +
              
              xs_01 + xs_02 + xs_03 + xs_04 + xs_05 + xs_06 +
              
              xn_01 + xn_02 + xn_03 + xn_04 + xn_05 + xn_06 + xn_07 + xn_08 +
              
              xw_01 + xw_02 + xw_03 , data = df_all)

#Model 3: Categorial and Continuous Linear Additive

mod3 <- lm( response ~ 
              region_XX + region_YY + region_ZZ + 
              customer_A + customer_B + customer_D + customer_E + customer_G + customer_K + customer_M + 
              customer_Other + customer_Q + xa_01 + 
              xa_02 + xa_03 + xa_04 + xa_05 + xa_06 + xa_07 + xa_08 +
              xb_01 + xb_02 +  xb_03 + xb_04 + xb_05 + xb_06 +  xb_07 + xb_08 +
              xs_01 + xs_02 + xs_03 + xs_04 + xs_05 + xs_06 +
              xn_01 + xn_02 + xn_03 + xn_04 + xn_05 + xn_06 + xn_07 + xn_08 +
              xw_01 + xw_02 + xw_03, data = df_all)

#Model 4: Categorical vs Continuous Interactions Linear Additive: Region

mod4 <- lm( log_response ~ 
              (xa_01 + xa_02 + xa_03 + xa_04 + xa_05 + xa_06 + xa_07 + xa_08 + xb_01 + xb_02 +  xb_03 + xb_04 + xb_05 + xb_06 +  xb_07 + xb_08 + xs_01 + xs_02 + xs_03 + xs_04 + xs_05 + xs_06 + xn_01 + xn_02 + xn_03 + xn_04 + xn_05 + xn_06 + xn_07 + xn_08 + xw_01 + xw_02 + xw_03) *
  (region_XX + region_YY + region_ZZ), data = df_all)

#Model 5: Categorical vs Continuous Interactions Linear Additive: Customer

mod5 <- lm( log_response ~ 
              
              (xa_01 +xa_02 + xa_03 + xa_04 + xa_05 + xa_06 + xa_07 + xa_08 + xb_01 + xb_02 +  xb_03 + xb_04 + xb_05 + xb_06 +  xb_07 + xb_08 + xs_01 + xs_02 + xs_03 + xs_04 + xs_05 + xs_06 + xn_01 + xn_02 + xn_03 + xn_04 + xn_05 + xn_06 + xn_07 + xn_08 + xw_01 + xw_02 + xw_03) *
  (customer_A + customer_B + customer_D + customer_E + customer_G + customer_K + customer_M +  customer_Other + customer_Q), data = df_all)

#Model 6: All Pairwise Interaction

mod6 <- lm( log_response ~ (xa_01 + 
              xa_02 + xa_03 + xa_04 + xa_05 + xa_06 + xa_07 + xa_08 +
              xb_01 + xb_02 +  xb_03 + xb_04 + xb_05 + xb_06 +  xb_07 + xb_08 +
              xs_01 + xs_02 + xs_03 + xs_04 + xs_05 + xs_06 +
              xn_01 + xn_02 + xn_03 + xn_04 + xn_05 + xn_06 + xn_07 + xn_08 +
              xw_01 + xw_02 + xw_03)^2 , data = df_all)

#Model 7

df_a <- 3
df_b <- 3
df_s <- 3
df_n <- 3
df_w <- 3

mod7 <- lm( log_response ~ 
              
              splines::ns(xa_01, df_a) + splines::ns(xa_02, df_a) + splines::ns(xa_03, df_a) +
              splines::ns(xa_04, df_a) +
              splines::ns(xa_05, df_a) + splines::ns(xa_06, df_a) + splines::ns(xa_07, df_a) +
              splines::ns(xa_08, df_a) +
              
              splines::ns(xb_01, df_b) + splines::ns(xb_02, df_b) +  splines::ns(xb_03, df_b) +
              splines::ns(xb_04, df_b) +
              splines::ns(xb_05, df_b) + splines::ns(xb_06, df_b) +  splines::ns(xb_07, df_b) +
              splines::ns(xb_08, df_b) + 
              
              splines::ns(xs_01, df_s) + splines::ns(xs_02, df_s) + splines::ns(xs_03, df_s) +
              splines::ns(xs_04, df_s) +
              splines::ns(xs_05, df_s) + splines::ns(xs_06, df_s) +
              
              splines::ns(xn_01, df_n) + splines::ns(xn_02, df_n) + splines::ns(xn_03, df_n) +
              splines::ns(xn_04, df_n) +
              splines::ns(xn_05, df_n) + splines::ns(xn_06, df_n) + splines::ns(xn_07, df_n) +
              splines::ns(xn_08, df_n) +
      
              splines::ns(xw_01, df_w) + splines::ns(xw_02, df_w) + splines::ns(xw_03, df_w),
              data = df_all)

#Model 8 - Splines Interact Customer

df_a <- 3
df_b <- 3
df_s <- 3
df_n <- 3
df_w <- 3

mod8 <- lm( log_response ~ 
              (splines::ns(xa_01, df_a) + splines::ns(xa_02, df_a) +
              splines::ns(xa_03, df_a) + splines::ns(xa_04, df_a) +
              splines::ns(xa_05, df_a) + splines::ns(xa_06, df_a) +
              splines::ns(xa_07, df_a) + splines::ns(xa_08, df_a) +
                
              splines::ns(xb_01, df_b) + splines::ns(xb_02, df_b) + 
              splines::ns(xb_03, df_b) + splines::ns(xb_04, df_b) +
              splines::ns(xb_05, df_b) + splines::ns(xb_06, df_b) +
              splines::ns(xb_07, df_b) + splines::ns(xb_08, df_b) + 
                
              splines::ns(xs_01, df_s) + splines::ns(xs_02, df_s) +
              splines::ns(xs_03, df_s) + splines::ns(xs_04, df_s) +
              splines::ns(xs_05, df_s) + splines::ns(xs_06, df_s) +
                
              splines::ns(xn_01, df_n) + splines::ns(xn_02, df_n) +
              splines::ns(xn_03, df_n) + splines::ns(xn_04, df_n) +
              splines::ns(xn_05, df_n) + splines::ns(xn_06, df_n) +
              splines::ns(xn_07, df_n) + splines::ns(xn_08, df_n) +
                
              splines::ns(xw_01, df_w) + splines::ns(xw_02, df_w) +
              splines::ns(xw_03, df_w)) *
                
  (customer_A + customer_B + customer_D + customer_E + customer_G +
  customer_K + customer_M +  customer_Other + customer_Q),
    data = df_all)

#Model 9 - Splines Interact Region

df_a <- 3
df_b <- 3
df_s <- 3
df_n <- 3
df_w <- 3


mod9 <- lm( log_response ~ 
              
      (splines::ns(xa_01, df_a) +splines::ns(xa_02, df_a) + splines::ns(xa_03, df_a) + splines::ns(xa_04, df_a) +
      splines::ns(xa_05, df_a) + splines::ns(xa_06, df_a) + splines::ns(xa_07, df_a) + splines::ns(xa_08, df_a) +
              
      splines::ns(xb_01, df_b) + splines::ns(xb_02, df_b) +  splines::ns(xb_03, df_b) + splines::ns(xb_04, df_b)+ splines::ns(xb_05, df_b) + splines::ns(xb_06, df_b) +  splines::ns(xb_07, df_b) + splines::ns(xb_08, df_b) + 
              
  splines::ns(xs_01, df_s) + splines::ns(xs_02, df_s) + splines::ns(xs_03, df_s) + splines::ns(xs_04, df_s) +
  splines::ns(xs_05, df_s) + splines::ns(xs_06, df_s) +
              
  splines::ns(xn_01, df_n) + splines::ns(xn_02, df_n) + splines::ns(xn_03, df_n) + splines::ns(xn_04, df_n) +
  splines::ns(xn_05, df_n) + splines::ns(xn_06, df_n) + splines::ns(xn_07, df_n) + splines::ns(xn_08, df_n) +
              
  splines::ns(xw_01, df_w) + splines::ns(xw_02, df_w) + splines::ns(xw_03, df_w)) *
  (region_XX + region_YY + region_ZZ),
    
  data = df_all)

#Model Evaluation

mod_performace<-bind_rows(
broom::glance(mod1),
broom::glance(mod2),
broom::glance(mod3),
broom::glance(mod4),
broom::glance(mod5),
broom::glance(mod6),
broom::glance(mod7),
broom::glance(mod8),
broom::glance(mod9))

mod_performace
## # A tibble: 9 x 12
##   r.squared adj.r.squared sigma statistic   p.value    df  logLik   AIC   BIC
##       <dbl>         <dbl> <dbl>     <dbl>     <dbl> <dbl>   <dbl> <dbl> <dbl>
## 1     0.182         0.170 0.487     14.8  4.93e- 24    10  -468.   961. 1015.
## 2     0.542         0.519 0.371     23.1  1.16e- 87    33  -272.   614.  772.
## 3     0.525         0.493 1.25      16.3  3.20e- 76    43 -1086.  2263. 2466.
## 4     0.727         0.679 0.303     15.2  6.28e-110   101   -96.7  399.  865.
## 5     0.821         0.679 0.303      5.79 3.41e- 55   299    46.1  510. 1870.
## 6     0.924         0.556 0.357      2.51 6.53e-  9   561   338.   451. 2994.
## 7     0.619         0.554 0.357      9.47 1.30e- 72    99  -210.   622. 1078.
## 8     0.932         0.711 0.288      4.21 1.27e- 22   517   373.   291. 2636.
## 9     0.839         0.711 0.288      6.56 2.32e- 62   299    81.3  439. 1799.
## # ... with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>

##Top 3 Model Coef Plot

#Best Model

Reasoning: I want to focus around determining the strength of the correlation between the predictors and the response. In focusing around these models of high r-squared values, we can also narrow down those predictors which are most significant of which, according to the plot(varImp(mod8)) are:

# Mod 8
coefplot::coefplot(mod8)

#Second Best Model

# Mod 6
coefplot::coefplot(mod6)

#Third Best Model

# Mod 4
coefplot::coefplot(mod9)

How do coefficient summaries compare between the 3 models?

Which inputs seem important?

set.seed(12345)
my_ctrl <- caret::trainControl(method = "repeatedcv",
                               number = 5,
                               repeats = 3,
                               savePredictions = TRUE)
my_metric <- "Rsquared"
mod8<- train(log_response ~ 
              (splines::ns(xa_01, 3) + splines::ns(xa_02, 3) +
              splines::ns(xa_03, 3) + splines::ns(xa_04, 3) +
              splines::ns(xa_05, 3) + splines::ns(xa_06, 3) +
              splines::ns(xa_07, 3) + splines::ns(xa_08, 3) +
                
              splines::ns(xb_01, 3) + splines::ns(xb_02, 3) + 
              splines::ns(xb_03, 3) + splines::ns(xb_04, 3) +
              splines::ns(xb_05, 3) + splines::ns(xb_06, 3) +
              splines::ns(xb_07, 3) + splines::ns(xb_08, 3) + 
                
              splines::ns(xs_01, 3) + splines::ns(xs_02, 3) +
              splines::ns(xs_03, 3) + splines::ns(xs_04, 3) +
              splines::ns(xs_05, 3) + splines::ns(xs_06, 3) +
                
              splines::ns(xn_01, 3) + splines::ns(xn_02, 3) +
              splines::ns(xn_03, 3) + splines::ns(xn_04, 3) +
              splines::ns(xn_05, 3) + splines::ns(xn_06, 3) +
              splines::ns(xn_07, 3) + splines::ns(xn_08, 3) +
                
              splines::ns(xw_01, 3) + splines::ns(xw_02, 3) +
              splines::ns(xw_03, 3)) *
                
  (customer_A + customer_B + customer_D + customer_E + customer_G +
  customer_K + customer_M +  customer_Other + customer_Q), 
                    data = df_all, 
                    method = "lm", 
                    metric = my_metric, 
                    preProcess = c("center" , "scale"),
                    trControl = my_ctrl,
                    trace = FALSE)
plot(varImp(mod8), top = 10)

###CLASSIFICATION

#Model 1: Categorical Linear Additive

mod1 <- glm( outcome ~ region_XX + region_YY + region_ZZ + customer_A + customer_B + customer_D + customer_E + customer_G + customer_K + customer_M +  customer_Other + customer_Q, family = "binomial", data = df_all)

#Model 2: Continuous Linear Additive

mod2 <- glm( outcome ~ xa_01 + 
              xa_02 + xa_03 + xa_04 + xa_05 + xa_06 + xa_07 + xa_08 +
              
              xb_01 + xb_02 +  xb_03 + xb_04 + xb_05 + xb_06 +  xb_07 + xb_08 +
              
              xs_01 + xs_02 + xs_03 + xs_04 + xs_05 + xs_06 +
              
              xn_01 + xn_02 + xn_03 + xn_04 + xn_05 + xn_06 + xn_07 + xn_08 +
              
              xw_01 + xw_02 + xw_03 ,family = "binomial", data = df_all)

#Model 3: Continuous and Categorical Linear Additive

mod3 <- glm( outcome ~ 
              region_XX + region_YY + region_ZZ + 
              
              customer_A + customer_B + customer_D + customer_E + customer_G + customer_K + customer_M +  customer_Other + customer_Q +
xa_01 + 
              xa_02 + xa_03 + xa_04 + xa_05 + xa_06 + xa_07 + xa_08 +
              
              xb_01 + xb_02 +  xb_03 + xb_04 + xb_05 + xb_06 +  xb_07 + xb_08 +
              
              xs_01 + xs_02 + xs_03 + xs_04 + xs_05 + xs_06 +
              
              xn_01 + xn_02 + xn_03 + xn_04 + xn_05 + xn_06 + xn_07 + xn_08 +
              
              xw_01 + xw_02 + xw_03, family = "binomial",data = df_all)

#Model 4: Categorical vs Continuous Interactions Linear Additive: Region

mod4 <- glm( outcome ~ 
              (xa_01 + xa_02 + xa_03 + xa_04 + xa_05 + xa_06 + xa_07 + xa_08) *
  (region_XX + region_YY + region_ZZ) +
              
              (xb_01 + xb_02 +  xb_03 + xb_04 + xb_05 + xb_06 +  xb_07 + xb_08) * 
  (region_XX + region_YY + region_ZZ) +
              
              (xs_01 + xs_02 + xs_03 + xs_04 + xs_05 + xs_06)*
  (region_XX + region_YY + region_ZZ) +
              
              (xn_01 + xn_02 + xn_03 + xn_04 + xn_05 + xn_06 + xn_07 + xn_08)*
  (region_XX + region_YY + region_ZZ) +
              
              (xw_01 + xw_02 + xw_03)*
  (region_XX + region_YY + region_ZZ), family = "binomial", data = df_all)

#Model 5: Categorical vs Continuous Interactions Linear Additive: Customer

mod5 <- glm( outcome ~ 
              
              (xa_01 +xa_02 + xa_03 + xa_04 + xa_05 + xa_06 + xa_07 + xa_08) *
  (customer_A + customer_B + customer_D + customer_E + customer_G + customer_K + customer_M +  customer_Other + customer_Q) +
              
              (xb_01 + xb_02 +  xb_03 + xb_04 + xb_05 + xb_06 +  xb_07 + xb_08) * 
  (customer_A + customer_B + customer_D + customer_E + customer_G + customer_K + customer_M +  customer_Other + customer_Q) +
              
              (xs_01 + xs_02 + xs_03 + xs_04 + xs_05 + xs_06)*
  (customer_A + customer_B + customer_D + customer_E + customer_G + customer_K + customer_M +  customer_Other + customer_Q) +
              
              (xn_01 + xn_02 + xn_03 + xn_04 + xn_05 + xn_06 + xn_07 + xn_08)*
  (customer_A + customer_B + customer_D + customer_E + customer_G + customer_K + customer_M +  customer_Other + customer_Q) +
              
              (xw_01 + xw_02 + xw_03)*
  (customer_A + customer_B + customer_D + customer_E + customer_G + customer_K + customer_M +  customer_Other + customer_Q), family = "binomial",data = df_all)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

#Model 6: All Pairwise Interaction

mod6 <- glm( outcome ~ (xa_01 + 
              xa_02 + xa_03 + xa_04 + xa_05 + xa_06 + xa_07 + xa_08 +
              
              xb_01 + xb_02 +  xb_03 + xb_04 + xb_05 + xb_06 +  xb_07 + xb_08 +
              
              xs_01 + xs_02 + xs_03 + xs_04 + xs_05 + xs_06 +
              
              xn_01 + xn_02 + xn_03 + xn_04 + xn_05 + xn_06 + xn_07 + xn_08 +
              
              xw_01 + xw_02 + xw_03)^2 , family = "binomial", data = df_all)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

#Model 7: Spline of Continuous

df_a <- 2
df_b <- 2
df_s <- 2
df_n <- 2
df_w <- 2

mod7 <- glm( outcome ~ 
              
              splines::ns(xa_01, df_a) +splines::ns(xa_02, df_a) + splines::ns(xa_03, df_a) + splines::ns(xa_04, df_a) +
              splines::ns(xa_05, df_a) + splines::ns(xa_06, df_a) + splines::ns(xa_07, df_a) + splines::ns(xa_08, df_a) +
              
              splines::ns(xb_01, df_b) + splines::ns(xb_02, df_b) +  splines::ns(xb_03, df_b) + splines::ns(xb_04, df_b) +
              splines::ns(xb_05, df_b) + splines::ns(xb_06, df_b) +  splines::ns(xb_07, df_b) + splines::ns(xb_08, df_b) + 
              
              splines::ns(xs_01, df_s) + splines::ns(xs_02, df_s) + splines::ns(xs_03, df_s) + splines::ns(xs_04, df_s) +
              splines::ns(xs_05, df_s) + splines::ns(xs_06, df_s) +
              
              splines::ns(xn_01, df_n) + splines::ns(xn_02, df_n) + splines::ns(xn_03, df_n) + splines::ns(xn_04, df_n) +
              splines::ns(xn_05, df_n) + splines::ns(xn_06, df_n) + splines::ns(xn_07, df_n) + splines::ns(xn_08, df_n) +
              
              splines::ns(xw_01, df_w) + splines::ns(xw_02, df_w) + splines::ns(xw_03, df_w), family = "binomial", data = df_all)

#Model 8: Spline of Continuous Interact Region

df_a <- 2
df_b <- 2
df_s <- 2
df_n <- 2
df_w <- 2

mod8 <- glm( outcome ~ 
              
              (splines::ns(xa_01, df_a) +splines::ns(xa_02, df_a) + splines::ns(xa_03, df_a) + splines::ns(xa_04, df_a) +
              splines::ns(xa_05, df_a) + splines::ns(xa_06, df_a) + splines::ns(xa_07, df_a) + splines::ns(xa_08, df_a) +
              
              splines::ns(xb_01, df_b) + splines::ns(xb_02, df_b) +  splines::ns(xb_03, df_b) + splines::ns(xb_04, df_b) +
              splines::ns(xb_05, df_b) + splines::ns(xb_06, df_b) +  splines::ns(xb_07, df_b) + splines::ns(xb_08, df_b) + 
              
              splines::ns(xs_01, df_s) + splines::ns(xs_02, df_s) + splines::ns(xs_03, df_s) + splines::ns(xs_04, df_s) +
              splines::ns(xs_05, df_s) + splines::ns(xs_06, df_s) +
              
              splines::ns(xn_01, df_n) + splines::ns(xn_02, df_n) + splines::ns(xn_03, df_n) + splines::ns(xn_04, df_n) +
              splines::ns(xn_05, df_n) + splines::ns(xn_06, df_n) + splines::ns(xn_07, df_n) + splines::ns(xn_08, df_n) +
              
              splines::ns(xw_01, df_w) + splines::ns(xw_02, df_w) + splines::ns(xw_03, df_w)) *
  (customer_A + customer_B + customer_D + customer_E + customer_G + customer_K + customer_M +  customer_Other + customer_Q),
    
    family = "binomial", data = df_all)

#Model 9: Spline of Continuous Interact Customer

df_a <- 2
df_b <- 2
df_s <- 2
df_n <- 2
df_w <- 2

mod9 <- glm( outcome ~ 
              
              (splines::ns(xa_01, df_a) +splines::ns(xa_02, df_a) + splines::ns(xa_03, df_a) + splines::ns(xa_04, df_a) +
              splines::ns(xa_05, df_a) + splines::ns(xa_06, df_a) + splines::ns(xa_07, df_a) + splines::ns(xa_08, df_a) +
              
              splines::ns(xb_01, df_b) + splines::ns(xb_02, df_b) +  splines::ns(xb_03, df_b) + splines::ns(xb_04, df_b) +
              splines::ns(xb_05, df_b) + splines::ns(xb_06, df_b) +  splines::ns(xb_07, df_b) + splines::ns(xb_08, df_b) + 
              
              splines::ns(xs_01, df_s) + splines::ns(xs_02, df_s) + splines::ns(xs_03, df_s) + splines::ns(xs_04, df_s) +
              splines::ns(xs_05, df_s) + splines::ns(xs_06, df_s) +
              
              splines::ns(xn_01, df_n) + splines::ns(xn_02, df_n) + splines::ns(xn_03, df_n) + splines::ns(xn_04, df_n) +
              splines::ns(xn_05, df_n) + splines::ns(xn_06, df_n) + splines::ns(xn_07, df_n) + splines::ns(xn_08, df_n) +
              
              splines::ns(xw_01, df_w) + splines::ns(xw_02, df_w) + splines::ns(xw_03, df_w)) *
  (region_XX + region_YY + region_ZZ),
    
    family = "binomial", data = df_all)

#Model Evaluation

mod_performace<-bind_rows(
broom::glance(mod1),
broom::glance(mod2),
broom::glance(mod3),
broom::glance(mod4),
broom::glance(mod5),
broom::glance(mod6),
broom::glance(mod7),
broom::glance(mod8),
broom::glance(mod9))

mod_performace
## # A tibble: 9 x 8
##   null.deviance df.null   logLik   AIC   BIC deviance df.residual  nobs
##           <dbl>   <int>    <dbl> <dbl> <dbl>    <dbl>       <int> <int>
## 1          654.     676 -3.11e+2  644.  693.  6.22e+2         666   677
## 2          654.     676 -2.14e+2  496.  650.  4.28e+2         643   677
## 3          654.     676 -2.00e+2  489.  688.  4.01e+2         633   677
## 4          654.     676 -1.86e+2  575. 1036.  3.71e+2         575   677
## 5          654.     676 -2.52e+3 5646. 7001.  5.05e+3         377   677
## 6          654.     676 -3.50e-9 1124. 3663.  7.01e-9         115   677
## 7          654.     676 -1.95e+2  524.  827.  3.90e+2         610   677
## 8          654.     676 -2.85e+3 6600. 8642.  5.70e+3         225   677
## 9          654.     676 -3.10e+3 6602. 7510.  6.20e+3         476   677

##Top 3 Model Coef Plot

#Best Model

coefplot::coefplot(mod9)

#Second Best Model

# Mod 6
coefplot::coefplot(mod8)

#Third Best Model

# Mod 4
coefplot::coefplot(mod5)